In [1]:

    
import numpy as np
from sklearn import linear_model
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline

Generalized Linear Models

1) Ordinary Least Squares



In [2]:

    
dataset = datasets.load_diabetes()
X = dataset.data[:, np.newaxis, 2]
Y = dataset.target.reshape(442, 1)

print(X.shape)
print(Y.shape)









    



(442, 1)
(442, 1)



In [29]:

    
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 0)
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 0)
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)









    



[[152.13348416]] [[949.43525847]]



In [17]:

    
# Using sklearn
reg = linear_model.LinearRegression(fit_intercept = True, normalize = False, n_jobs = -1)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)









    



[152.13348416] [[949.43526038]]



In [29]:

    
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()

2) Ridge Regression



In [30]:

    
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
alpha = 0.8

for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 0) + alpha*np.sum(np.square(W))
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 0) + 2*alpha*W
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)









    



[[152.13348416]] [[527.46403355]]



In [48]:

    
# Using sklearn
reg = linear_model.Ridge(alpha = 0.8, fit_intercept = True, normalize = False, max_iter = 10000, solver = 'auto')
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)









    



[152.13348416] [[527.46403355]]



In [52]:

    
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()



In [57]:

    
# To do cross validation for alpha in Ridge Regression
# Just use a for loop in the implementation to iterarate over all values of the alphas

# In sklearn use RidgeCV
reg = linear_model.RidgeCV(alphas = (0.1, 0.2, 0.3, 0.4), fit_intercept = True, normalize = False, scoring = None, cv = None, gcv_mode = None)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_, reg.alpha_)









    



[152.13348416] [[863.12296399]] 0.1

3) Lasso



In [76]:

    
# Implementation when you update all paramters for a iteration
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.1
alpha = 0.8
iterations = 10000

for _ in range(iterations):
    y_hat = b + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 0) / (2*X.shape[0]) + alpha*np.sum(W, axis = 1)

    for a in range(W.shape[0] + 1):
        if a == 0:
            db = (-1/(2*X.shape[0])) * (2*np.sum(err, axis = 0))
            b = b - learning_rate*db
        else:
            if W[a-1, 0] >= 0:
                dw_a = (-1/(2*X.shape[0])) * (2*np.sum(np.dot(err.T, X[:, a-1].reshape(X.shape[0], X.shape[1])), axis = 0)) + alpha
                W[a-1, 0] = W[a-1, 0] - learning_rate*dw_a
            else:
                dw_a = (-1/(2*X.shape[0])) * (2*np.sum(np.dot(err.T, X[:, a-1].reshape(X.shape[0], X.shape[1])), axis = 0)) - alpha
                W[a-1, 0] = W[a-1, 0] - learning_rate*dw_a
                
print(b, W)









    



[[152.13348416]] [[533.82717351]]



In [79]:

    
# When you update a single paramter for a iteration
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.2
alpha = 0.8
iterations = 40000

a = 0

for _ in range(iterations):
    y_hat = b + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 0) / (2*X.shape[0]) + np.sum(np.abs(W), axis = 0)

    if a == W.shape[0]+1:
        a = 0
    
    if a == 0:
        db = (-1/(2*X.shape[0])) * (2*np.sum(err, axis = 0))
        b = b - learning_rate*db
        a += 1
    else:
        if W[a-1, 0] >= 0:
            dw_a = (-1/(2*X.shape[0])) * (2*np.sum(np.dot(err.T, X[:, a-1].reshape(X.shape[0], X.shape[1])), axis = 0)) + alpha
            W[a-1, 0] = W[a-1, 0] - learning_rate*dw_a
            a += 1
        else:
            dw_a = (-1/(2*X.shape[0])) * (2*np.sum(np.dot(err.T, X[:, a-1].reshape(X.shape[0], X.shape[1])), axis = 0)) - alpha
            W[a-1, 0] = W[a-1, 0] - learning_rate*dw_a
            a += 1
                
print(b, W)









    



[[152.13348416]] [[595.76544198]]



In [81]:

    
reg = linear_model.Lasso(alpha = 0.8, fit_intercept = True, normalize = False, precompute = False, max_iter = 40000, positive = False, selection = 'cyclic')
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)
# precompute -> If you want to use gram matrix to speed up calulations
# tol -> Check until the update is smaller than tol









    



[152.13348416] [595.83526038]



In [82]:

    
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()



In [83]:

    
# sklearn has another class called lasso_path that computes the coefficients along
# the full path of possible values for lower-level tasks.



In [88]:

    
# To do cross validation dor alpha using Lasoo use LassoCV class
reg = linear_model.LassoCV(eps = 0.001, n_alphas = 3, alphas = None, fit_intercept = True, normalize = False, precompute = 'auto', verbose = False, n_jobs = -1, positive = False, selection = 'cyclic')
reg.fit(X, Y.ravel())
print(reg.intercept_, reg.coef_)
# eps -> length of path
# alphas -> List of alphas. If None than it selects automatically









    



152.1334841628967 [948.48582512]

4) Multi-task Lasso



In [ ]:



In [ ]: